clear

set seed 1073741823

/* First session */
	insheet using "${pathdata_robustness}/RobustnessExperiment3_raw_wave1.csv", names

	
	
	replace product = ""
	replace product = "videogame" if p_guess_videogame != "" & treatment == "2"
	replace product = "bicycle" if p_guess_bicycle != "" & treatment == "2"
	replace product = "restaurant" if p_guess_restaurant != "" & treatment == "2"

	tab product if treatment == "2"
	rename prolific_pid PROLIFIC_PID

	drop in 1/2
	 
	drop if writedowninfo==""

	drop if PROLIFIC_PID==""
	duplicates drop PROLIFIC_PID, force

/* Remove problematic string character '}' */
	replace p_guess_videogame = subinstr(p_guess_videogame,"}","", .)
	replace p_guess_bicycle = subinstr(p_guess_bicycle,"}","", .)
	replace p_guess_restaurant = subinstr(p_guess_restaurant,"}","", .)


	destring *, replace

	
// Page Times

	gen page_time_statistic = 0
	
	replace page_time_statistic = time_bicycle if inlist(condition_bicycle, "statistic_pos", "statistic_neg")
	
	replace page_time_statistic = time_restaurant if inlist(condition_restaurant, "statistic_pos", "statistic_neg")

	replace page_time_statistic = time_videogame if inlist(condition_videogame, "statistic_pos", "statistic_neg")
	
	replace page_time_statistic = time_restaurant if inlist(condition_restaurant, "statistic_pos", "statistic_neg")

	
	gen page_time_story = 0
	
	replace page_time_story = time_bicycle if inlist(condition_bicycle, "storyshort_pos", "storyshort_neg")
	
	replace page_time_story = time_restaurant if inlist(condition_restaurant, "storyshort_pos", "storyshort_neg")

	replace page_time_story = time_videogame if inlist(condition_videogame, "storyshort_pos", "storyshort_neg")
	
	replace page_time_story = time_restaurant if inlist(condition_restaurant, "storyshort_pos", "storyshort_neg")




/* Variables to keep */
	keep condition_* p_guess* task* PROLIFIC_PID treatment valence_* qid9 qid124 qid12 qid18 finished comprehension1 comprehension3 comprehension5 q202 screenedout writedowninfo page_time_story page_time_statistic

	rename qid9 gender 

	rename qid124 age

	rename qid12 education

	rename qid18 employment

	* Indicator Male
	gen male = .
	replace male = 0 if inlist(gender, 2,3)
	replace male = 1 if gender == 1

	* Indicator Bachelor's degree or more
	gen college = .
	replace college = 0 if inlist(education, 1,2,3,4)
	replace college = 1 if inlist(education, 5,6,7,8)

	* Indicator Employment status
	gen employed = .
	replace employed = 0 if inlist(employment,3,4)
	replace employed = 1 if inlist(employment,1,2)

	* Indicator wave 1
	gen wave1 = 1
	
	* Indicator finished wave 1 - EMPLOYMENT IS THE LAST MANDATORY QUESTION IN THE SURVEY
	gen completed_wave1 = 0
	replace completed_wave1 = 1 if inlist(employment,1,2,3,4)

	drop p_guess_book p_guess_club p_guess_soda

	drop condition_restauranta condition_restaurantb condition_restaurantc condition_book condition_club condition_soda

	duplicates drop PROLIFIC_PID, force 

/* Wide to long */
//reshape long condition_ p_guess_ task_, i(prolific_pid) j(product) string
reshape long p_guess_ task_ condition_, i(PROLIFIC_PID) j(product) string

save "${pathdata_robustness}/temp/RobustnessExperiment3_long.dta", replace

clear

/* Second (recall) session */
insheet using "${pathdata_robustness}/RobustnessExperiment3_raw_wave2.csv", names
drop in 1/2

	rename prolific_pid PROLIFIC_PID


	/* Variables to keep */
	keep beliefs_* recall_* PROLIFIC_PID screenedout valence_*
	drop if PROLIFIC_PID == ""
	sort PROLIFIC_PID
	quietly by PROLIFIC_PID:  gen dup = cond(_N==1,0,_n)
	drop if dup > 0
	drop dup  

	* Indicator wave 2
	gen wave2 = 1




/* Wide to long */
	reshape long beliefs_ recall_ valence_, i(PROLIFIC_PID) j(product) string


save "${pathdata_robustness}/temp/RobustnessExperiment3_long_recall.dta", replace

/* Merged */
merge 1:1 PROLIFIC_PID product using "${pathdata_robustness}/temp/RobustnessExperiment3_long.dta"

* ########## CREATE DATA ON ATTRITION IN BETWEEN SUBJECT TREATMENTS ##########	

	
	* indicator whether subjects completed study 2
	gen completed = 0
	replace completed = 1 if _merge ==3
	
	* betweeen subject treatment indicators
	gen product1 = 0
	replace product1 = 1 if treatment == 2
	
	gen product6 = 0
	replace product6 = 1 if treatment == 3
	
	
	
	* ##### Generate dataset with treatments and completion only
preserve
	* drop NAs
	drop if _merge==1

	
	* drop products restauranta, restaurantb and restaurantc as they only occur in the first wave and can thus never be matched
	
	drop if inlist(product, "restauranta", "restaurantb", "restaurantc")
	
		keep PROLIFIC_PID completed product6 product1

	
	duplicates drop
	
		save "${pathdata_summary}/ProductAttrition.dta", replace

	
restore
	
	drop product1 product6



keep if _merge==3
drop _merge

destring*, replace


/* Exclusion Criteria */
drop if screenedout == "True"

save "${pathdata_robustness}/temp/RobustnessExperiment3_merged.dta", replace


/* Auxiliary variables */
gen storyshort = 0
replace storyshort = 1 if condition == "storyshort_pos" || condition == "storyshort_neg"

/* Variables for analysis */

/* Ad-hoc fix (weird "}" in some data) */
destring p_guess_, replace ignore("}")


/* 1 - Effect variables */
gen diff = 50 - p_guess_
gen diff_recall = 50 - beliefs_

gen effect = diff
replace effect = p_guess_ - 50 if condition_ == "storyshort_pos" || condition_ == "statistic_pos"

gen effect_recall = diff_recall
replace effect_recall = beliefs_ - 50 if condition_ == "storyshort_pos" || condition_ == "statistic_pos"


/* Valence recall */
gen valence_recall = 0
* if constraining valence recall to those who actually were asked to recall valence, i.e. who did not select noinfo as type of information remembered
//replace valence_recall = . if valence_ == .
replace valence_recall = 1 if valence_ == 1 & inlist(condition_, "statistic_pos", "storyshort_pos")
replace valence_recall = 1 if valence_ == 2 & inlist(condition_, "statistic_neg", "storyshort_neg")

/* Recall type of information */
gen type_recall = 0
replace type_recall = . if recall_ == .
replace type_recall =  1 if condition_=="noinfo" & recall_ ==1
replace type_recall =  1 if inlist(condition_, "statistic_pos", "statistic_neg") & recall_ == 2
replace type_recall =  1 if inlist(condition_, "storyshort_neg", "storyshort_pos") & recall_ ==3

/* Combined recall */
gen correct_recall = 0
replace correct_recall = 1 if type_recall == 1 & valence_recall == 1
replace correct_recall = 1 if condition == "noinfo" & type_recall == 1

preserve
/* Drop missing products */
drop if p_guess_ == .
drop if condition_ == "noinfo"

save "${pathdata_robustness}/RobustnessExperiment3.dta", replace



* ##### Save data for summary statistics
rename PROLIFIC_PID prolific_pid

	keep prolific_pid male age college employed

	duplicates drop

	save "${pathdata_summary}/ProductSummary.dta", replace
